#!/usr/bin/python3

"""
This script reads the AndroidManifest.xml files in the malicous_apk
and benign_apk folders and copies the requested permissions into a
JSON file for later analysis

The output data format is as follows:
{"features": ["ANDROID.PERMISSION.RECEIVE_BOOT_COMPLETED", ...],
 "apps": {"999eca2457729e371355aea5faa38e14.apk": {"vector": [0,0,0,1], "malicious": [0,1]}, ...}}
"""

import os
from defusedxml import ElementTree
import json
import glob

__author__='mwleeds'

def main():
    all_permissions = [] # list of strings naming each permission used in the dataset
    app_permission_map = {} # mapping from android app names to lists of permissions
    app_malicious_map = {} # mapping from android app names to 1 or 0 for malware or goodware
    root_dir = os.getcwd()
    for i, directory in enumerate(['benign_apk', 'malicious_apk']):
        os.chdir(directory)
        category_root_dir = os.getcwd()
        for filename in glob.glob('*.apk'):
            print('Processing ' + filename)
            try:
                os.chdir(filename[:-4])
                with open('AndroidManifest.xml') as xml_file:
                    et = ElementTree.parse(xml_file)
            except (ElementTree.ParseError, UnicodeDecodeError, FileNotFoundError):
                print('Parsing error encountered for ' + filename)
                os.chdir(category_root_dir)
                continue
            app_name = filename
            # make a one-hot bit vector of length 2. 1st bit set if malicious, otherwise 2nd bit
            app_malicious_map[app_name] = [1,0] if i else [0,1]
            permissions = et.getroot().findall('./uses-permission')
            app_permission_map[app_name] = []
            for permission in permissions:
                try:
                    permission_name = permission.attrib['{http://schemas.android.com/apk/res/android}name'].upper()
                    if not permission_name.startswith('ANDROID.PERMISSION'): continue # ignore custom permissions
                    if permission_name not in all_permissions: all_permissions.append(permission_name)
                    app_permission_map[app_name].append(permission_name)
                except KeyError:
                    pass
            os.chdir(os.pardir)
        os.chdir(root_dir)
    all_apps = {} # mapping combining app_permission_map and app_malicious_map using bits
    for app_name in app_permission_map:
        bit_vector = [1 if p in app_permission_map[app_name] else 0 for p in all_permissions]
        all_apps[app_name] = {'vector': bit_vector, 'malicious': app_malicious_map[app_name]}
    with open('app_permission_vectors.json', 'w') as outfile:
        json.dump({'features': all_permissions, 'apps': all_apps}, outfile)
    print('Wrote data on ' + str(len(all_permissions)) + ' permissions and ' + str(len(all_apps)) + ' apps to a file.')

if __name__=='__main__':
    main()
